The data file should have the first two columns with annotation (for example probe ID and gene Symbol). Numerical data start from column 3.
data_file = "TNF_IFN_2.csv"
my_data = read.csv(data_file,sep = '\t')
head(my_data)
## probe gene CTRL CTRL.1 CTRL.2 IFN.2.5 IFN.2.5.1 IFN.2.5.2
## 1 RFC2 RFC2 8.324769 8.324769 8.132764 8.375789 8.373458 8.324769
## 2 HSPA6 HSPA6 5.413300 4.830096 5.929786 8.329249 8.006876 7.950832
## 3 PAX8 PAX8 5.594199 5.581641 5.581641 5.661038 5.581641 5.581641
## 4 UBA7 UBA7 7.361664 7.307654 7.336399 7.336399 7.217897 7.197828
## 5 CCL5 CCL5 9.985708 8.999638 10.371103 4.692704 4.517400 4.996733
## 6 ESRRA ESRRA 6.335024 6.373706 6.539027 6.630504 6.847058 6.778308
## TNF.2.5 TNF.2.5.1 TNF.2.5.2 TNF.IFN.2.5 TNF.IFN.2.5.1 TNF.IFN.2.5.2
## 1 8.324769 8.462647 8.386247 8.324769 8.338911 8.192397
## 2 6.248139 6.207699 5.940508 7.677756 7.484339 7.540568
## 3 5.581641 5.581641 5.581641 5.504055 5.411538 5.545741
## 4 7.671212 7.607462 7.470790 7.420842 7.336399 7.095814
## 5 12.476331 12.450789 12.326590 6.663396 5.642317 6.052862
## 6 6.155354 6.370346 6.397341 6.975653 6.886920 6.539027
#get numeric data
expression_data = my_data[,-(1:2)]
if (max(expression_data)>25) expression_data = log2(expression_data)
#we assume the same number of samples for each condition
samples = ncol(expression_data)/4
#removing uninformative probes (very small coefficient of variation)
cof_cutoff = 0.05
cof = apply(expression_data, 1, function(x) sd(x)/mean(x))
cof_filter = which(cof > cof_cutoff)
#graphical parameters
source("setPowerPointStyle.R")
setPowerPointStyle()
my.pca <- prcomp(t(expression_data[cof_filter, ]), center = TRUE, scale=TRUE)
cols = c(rep("black", samples), rep("red", samples),
rep("blue", samples), rep("yellow", samples))
plot(my.pca$x[, 1], my.pca$x[, 2], col = cols,
xlab = "PC1", ylab = "PC2", pch = 20, cex = 1.5, main = data_file)
legend("bottomleft", pch = 20, col = unique(cols),
legend = c("0","X","Y","X+Y"), bty = 'n',cex = 1)
source("filter_data_on_deltas.R")
design = factor(c(rep("0",samples),rep("X",samples),
rep("Y",samples),rep("Y+X",samples)))
my_data_filtered = filter_data_on_deltas(my_data, design = design)
head(my_data_filtered)
## probe gene CTRL CTRL.1 CTRL.2 IFN.2.5 IFN.2.5.1
## 2 HSPA6 HSPA6 5.413300 4.830096 5.929786 8.329249 8.006876
## 5 CCL5 CCL5 9.985708 8.999638 10.371103 4.692704 4.517400
## 10 PXK PXK 9.294279 9.508841 9.240162 9.846890 10.014370
## 11 MSANTD3 MSANTD3 9.590901 9.274737 9.831871 7.027361 7.053774
## 12 SLC46A1 SLC46A1 6.048298 6.617517 6.384521 7.951326 7.867392
## 14 PIGX PIGX 9.020155 8.955406 9.036642 8.261126 8.799131
## IFN.2.5.2 TNF.2.5 TNF.2.5.1 TNF.2.5.2 TNF.IFN.2.5 TNF.IFN.2.5.1
## 2 7.950832 6.248139 6.207699 5.940508 7.677756 7.484339
## 5 4.996733 12.476331 12.450789 12.326590 6.663396 5.642317
## 10 10.093442 8.939042 9.193210 8.997016 9.972881 10.101698
## 11 7.320024 8.754430 8.966617 8.826732 7.349552 7.153825
## 12 7.562287 6.122274 6.428453 6.443509 8.016323 7.685079
## 14 8.650005 8.832036 8.905883 8.848334 9.028034 9.129482
## TNF.IFN.2.5.2
## 2 7.540568
## 5 6.052862
## 10 10.142173
## 11 7.248851
## 12 7.787337
## 14 8.781471
source("find_optimal_match.R")
my_data_filtered_matched = find_optimal_match(my_data_filtered)
head(my_data_filtered_matched)
## probe gene CTRL CTRL.1 CTRL.2 IFN.2.5 IFN.2.5.1
## 2 HSPA6 HSPA6 5.413300 4.830096 5.929786 8.329249 8.006876
## 5 CCL5 CCL5 9.985708 8.999638 10.371103 4.692704 4.517400
## 10 PXK PXK 9.294279 9.508841 9.240162 9.846890 10.014370
## 11 MSANTD3 MSANTD3 9.590901 9.274737 9.831871 7.027361 7.053774
## 12 SLC46A1 SLC46A1 6.048298 6.617517 6.384521 7.951326 7.867392
## 14 PIGX PIGX 9.020155 8.955406 9.036642 8.261126 8.799131
## IFN.2.5.2 TNF.2.5 TNF.2.5.1 TNF.2.5.2 TNF.IFN.2.5 TNF.IFN.2.5.1
## 2 7.950832 6.248139 6.207699 5.940508 7.677756 7.484339
## 5 4.996733 12.476331 12.450789 12.326590 6.663396 5.642317
## 10 10.093442 8.939042 9.193210 8.997016 9.972881 10.101698
## 11 7.320024 8.754430 8.966617 8.826732 7.349552 7.153825
## 12 7.562287 6.122274 6.428453 6.443509 8.016323 7.685079
## 14 8.650005 8.832036 8.905883 8.848334 9.028034 9.129482
## TNF.IFN.2.5.2 score prof_index
## 2 7.540568 0.290 52
## 5 6.052862 0.536 108
## 10 10.142173 0.426 86
## 11 7.248851 0.498 68
## 12 7.787337 0.660 21
## 14 8.781471 0.598 2
source("visualize_all_profiles.R")
source("setPowerPointStyle.R")
setPowerPointStyle()
visualize_all_profiles(my_data_filtered_matched)
source("generate_all_profiles.R")
generate_all_profiles()